{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import cv2\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.display import *\n",
    "from collections import Counter\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format = 'retina'\n",
    "IMAGE_DIR = 'image_contest_level_2_validate'\n",
    "CROP_DIR = 'crop_split_test'\n",
    "\n",
    "\n",
    "if not os.path.isdir(CROP_DIR):\n",
    "    os.mkdir(CROP_DIR)\n",
    "\n",
    "from multiprocessing import Pool, Lock, Manager"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据并行预处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def f(index):\n",
    "    img = cv2.imread('../../%s/%d.png'%(IMAGE_DIR, index))\n",
    "    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
    "\n",
    "    eq = cv2.equalizeHist(gray)\n",
    "    b = cv2.medianBlur(eq, 9)\n",
    "    \n",
    "    m, n = img.shape[:2]\n",
    "    b2 = cv2.resize(b, (n//4, m//4))\n",
    "\n",
    "    m1 = cv2.morphologyEx(b2, cv2.MORPH_OPEN, np.ones((7, 40)))\n",
    "    m2 = cv2.morphologyEx(m1, cv2.MORPH_CLOSE, np.ones((4, 4)))\n",
    "    _, bw = cv2.threshold(m2, 127, 255, cv2.THRESH_BINARY_INV)\n",
    "    \n",
    "    bw = cv2.resize(bw, (n, m))\n",
    "\n",
    "    img2, ctrs, hier = cv2.findContours(bw, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
    "    if len(ctrs) > 3:\n",
    "        print index\n",
    "    \n",
    "    # 微调三个公式\n",
    "    d = 10\n",
    "    imgs = []\n",
    "    sizes = []\n",
    "    for i, ctr in enumerate(ctrs):\n",
    "        x, y, w, h = cv2.boundingRect(ctr)\n",
    "        if w*h < 1000:\n",
    "            continue\n",
    "        roi = img[max(0, y-d):min(m, y+h+d),max(0, x-d):min(n, x+w+d)]\n",
    "        \n",
    "        x = b[max(0, y-d):min(m, y+h+d),max(0, x-d):min(n, x+w+d)]\n",
    "        _, x = cv2.threshold(x, 127, 255, cv2.THRESH_BINARY_INV)\n",
    "        _, x, _ = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
    "        x, y, w, h = cv2.boundingRect(np.vstack(x))\n",
    "        roi2 = roi[y:y+h,x:x+w]\n",
    "        imgs.append(roi2)\n",
    "        cv2.imwrite('%s/%d_%d.png'%(CROP_DIR, index, i), roi2)\n",
    "        sizes.append((w, h))\n",
    "    \n",
    "    # 连接三个公式\n",
    "    sizes = np.array(sizes)\n",
    "    w, h = sizes[:,1].max(), sizes[:,0].sum()+(len(sizes)-1)*2\n",
    "    img = np.zeros((w, h, 3), dtype=np.uint8)\n",
    "    x = 0\n",
    "    for a in imgs[::-1]:\n",
    "        iw = a.shape[1]\n",
    "        img[:a.shape[0], x:x+iw] = a\n",
    "        x += iw + 2\n",
    "    \n",
    "    cv2.imwrite('%s/%d.png'%(CROP_DIR, index), img)\n",
    "    \n",
    "    return [index, w, h, m, n, len(sizes)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "try:\n",
    "    p\n",
    "except:\n",
    "    p = Pool(12)\n",
    "\n",
    "n = 100000\n",
    "if __name__ == '__main__':\n",
    "    rs = []\n",
    "    for r in tqdm(p.imap_unordered(f, range(n)), total=n):\n",
    "        rs.append(r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(rs, columns=list('iwhmnr'))\n",
    "df.index = df['i']\n",
    "df = df.sort_index()\n",
    "df = df.drop('i', 1)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[df['r'] > 3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.to_csv('r_test.csv', index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 结果可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def disp2(img):\n",
    "    cv2.imwrite('a.png', img)\n",
    "    return Image('a.png')\n",
    "\n",
    "\n",
    "def disp(img, txt=None, first=False):\n",
    "    global index\n",
    "    if first:\n",
    "        index = 1\n",
    "        plt.figure(figsize=(16, 9))\n",
    "    else:\n",
    "        index += 1\n",
    "    plt.subplot(2, 3, index)\n",
    "    if len(img.shape) == 2:\n",
    "        plt.imshow(img, cmap='gray')\n",
    "    else:\n",
    "        plt.imshow(img[:,:,::-1])\n",
    "    if txt:\n",
    "        plt.title(txt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 技术原理\n",
    "\n",
    "* [转灰度图](http://docs.opencv.org/master/df/d9d/tutorial_py_colorspaces.html)\n",
    "* [二值化](http://docs.opencv.org/master/d7/d4d/tutorial_py_thresholding.html)\n",
    "* [直方图均衡](http://docs.opencv.org/master/d5/daf/tutorial_py_histogram_equalization.html)\n",
    "* [中值滤波](http://docs.opencv.org/master/d4/d13/tutorial_py_filtering.html)\n",
    "* [开运算](http://docs.opencv.org/master/d9/d61/tutorial_py_morphological_ops.html)\n",
    "* [轮廓查找](http://docs.opencv.org/master/d4/d73/tutorial_py_contours_begin.html)\n",
    "* [边界矩形](http://docs.opencv.org/master/dd/d49/tutorial_py_contour_features.html)\n",
    "\n",
    "参考链接：\n",
    "\n",
    "* http://docs.opencv.org/master/df/d9d/tutorial_py_colorspaces.html\n",
    "* http://docs.opencv.org/master/d7/d4d/tutorial_py_thresholding.html\n",
    "* http://docs.opencv.org/master/d5/daf/tutorial_py_histogram_equalization.html\n",
    "* http://docs.opencv.org/master/d4/d13/tutorial_py_filtering.html\n",
    "* http://docs.opencv.org/master/d9/d61/tutorial_py_morphological_ops.html\n",
    "* http://docs.opencv.org/master/d4/d73/tutorial_py_contours_begin.html\n",
    "* http://docs.opencv.org/master/dd/d49/tutorial_py_contour_features.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot(index):\n",
    "    global img, gray, b, eq, bw, m1, m2, r, roi\n",
    "    img = cv2.imread('../../%s/%d.png'%(IMAGE_DIR, index))\n",
    "    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
    "\n",
    "    eq = cv2.equalizeHist(gray)\n",
    "    b = cv2.medianBlur(eq, 9)\n",
    "    \n",
    "    m, n = img.shape[:2]\n",
    "    b2 = cv2.resize(b, (n//4, m//4))\n",
    "\n",
    "    m1 = cv2.morphologyEx(b2, cv2.MORPH_OPEN, np.ones((7, 40)))\n",
    "    m2 = cv2.morphologyEx(m1, cv2.MORPH_CLOSE, np.ones((4, 4)))\n",
    "    _, bw = cv2.threshold(m2, 127, 255, cv2.THRESH_BINARY_INV)\n",
    "    \n",
    "    bw = cv2.resize(bw, (n, m))\n",
    "\n",
    "    r = img.copy()\n",
    "    img2, ctrs, hier = cv2.findContours(bw, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
    "    for ctr in ctrs:\n",
    "        x, y, w, h = cv2.boundingRect(ctr)\n",
    "        cv2.rectangle(r, (x, y), (x+w, y+h), (0, 255, 0), 10)\n",
    "    x, y, w, h = cv2.boundingRect(np.vstack(ctrs))\n",
    "    \n",
    "    disp(img, 'raw img', 1)\n",
    "    disp(eq, 'eq')\n",
    "    disp(b, 'blur')\n",
    "    disp(m1, 'm1')\n",
    "    disp(m2, 'm2')\n",
    "    disp(r, 'rect')\n",
    "    \n",
    "    # 微调三个公式\n",
    "    d = 10\n",
    "    imgs = []\n",
    "    sizes = []\n",
    "    for i, ctr in enumerate(ctrs):\n",
    "        x, y, w, h = cv2.boundingRect(ctr)\n",
    "        roi = img[max(0, y-d):min(m, y+h+d),max(0, x-d):min(n, x+w+d)]\n",
    "        \n",
    "        x = b[max(0, y-d):min(m, y+h+d),max(0, x-d):min(n, x+w+d)]\n",
    "        _, x = cv2.threshold(x, 127, 255, cv2.THRESH_BINARY_INV)\n",
    "        _, x, _ = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
    "        x, y, w, h = cv2.boundingRect(np.vstack(x))\n",
    "        roi2 = roi[y:y+h,x:x+w]\n",
    "        imgs.append(roi2)\n",
    "        sizes.append((w, h))\n",
    "    \n",
    "    # 连接三个公式\n",
    "    sizes = np.array(sizes)\n",
    "    img = np.zeros((sizes[:,1].max(), sizes[:,0].sum()+len(sizes)-1, 3), dtype=np.uint8)\n",
    "    x = 0\n",
    "    for a in imgs[::-1]:\n",
    "        w = a.shape[1]\n",
    "        img[:a.shape[0], x:x+w] = a\n",
    "        x += w + 1\n",
    "    \n",
    "    return disp2(img)\n",
    "    \n",
    "plot(95170)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
